package xyz.docbleach.module.pdf;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.apache.pdfbox.cos.*;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDDestinationOrAction;
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
import org.apache.pdfbox.pdmodel.interactive.action.*;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import xyz.docbleach.api.BleachSession;
import xyz.docbleach.api.bleach.Bleach;
import xyz.docbleach.api.exception.BleachException;
import xyz.docbleach.api.threat.Threat;
import xyz.docbleach.api.threat.ThreatAction;
import xyz.docbleach.api.threat.ThreatSeverity;
import xyz.docbleach.api.threat.ThreatType;
import xyz.docbleach.api.util.StreamUtils;
import java.io.*;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.function.Function;
import static xyz.docbleach.api.threat.ThreatBuilder.threat;
/**
* PDF parsing is a bit tricky: everything may or may not be linked to additional actions, so we
* need to treat each and every elements.
*/
public class PdfBleach implements Bleach {
private static final Logger LOGGER = LoggerFactory.getLogger(PdfBleach.class);
private static final byte[] PDF_MAGIC = new byte[]{37, 80, 68, 70};
private static final String[] COMMON_PASSWORDS = new String[]{
null, "", "test", "example", "sample", "malware", "infected", "password"
};
private static final MemoryUsageSetting MEMORY_USAGE_SETTING = MemoryUsageSetting.setupMixed(1024 * 100);
@Override
public boolean handlesMagic(InputStream stream) {
return StreamUtils.hasHeader(stream, PDF_MAGIC);
}
@Override
public String getName() {
return "PDF Bleach";
}
@Override
public void sanitize(InputStream inputStream, OutputStream outputStream, BleachSession session) throws BleachException {
try (RandomAccessRead source = new RandomAccessBufferedFileInputStream(inputStream)) {
sanitize(source, outputStream, session);
} catch (IOException e) {
throw new BleachException(e);
}
}
private void sanitize(RandomAccessRead source, OutputStream outputStream, BleachSession session) throws IOException, BleachException {
final PDDocument doc = getDocument(source);
final PDDocumentCatalog docCatalog = doc.getDocumentCatalog();
sanitizeNamed(session, doc, docCatalog.getNames());
sanitizeOpenAction(session, docCatalog);
sanitizeDocumentActions(session, docCatalog.getActions());
sanitizePageActions(session, docCatalog.getPages());
sanitizeAcroFormActions(session, docCatalog.getAcroForm());
sanitizeDocumentOutline(session, doc.getDocumentCatalog().getDocumentOutline());
sanitizeObjects(session, doc.getDocument().getObjects());
doc.save(outputStream);
doc.close();
}
private void sanitizeDocumentOutline(BleachSession session, PDDocumentOutline documentOutline) {
if (documentOutline == null)
return;
if (!documentOutline.hasChildren())
return;
documentOutline.children().forEach(item -> sanitizeDocumentOutlineItem(session, item));
}
private void sanitizeDocumentOutlineItem(BleachSession session, PDOutlineItem item) {
if (item.getAction() == null)
return;
LOGGER.debug("Found&removed action on outline item (was {})", item.getAction());
item.setAction(null);
recordJavascriptThreat(session, "DocumentOutline Item Action", "Action");
}
private void sanitizeNamed(BleachSession session, PDDocument doc, PDDocumentNameDictionary names) {
if (names == null)
return;
sanitizeRecursiveNameTree(names.getEmbeddedFiles(), fileSpec -> sanitizeEmbeddedFile(session, doc, fileSpec));
sanitizeRecursiveNameTree(names.getJavaScript(), action -> sanitizeJavascript(session, doc, action));
names.setJavascript(null);
}
private void sanitizeJavascript(BleachSession session, PDDocument doc, PDActionJavaScript action) {
LOGGER.debug("Found JS Action: {}", action.getAction());
// @TODO: find samples and check what actions could be taken. For now, we remove the named tree.
}
private <T extends COSObjectable> void sanitizeRecursiveNameTree(PDNameTreeNode<T> efTree, Consumer<T> callback) {
if (efTree == null)
return;
Map<String, T> _names;
try {
_names = efTree.getNames();
} catch (IOException e) {
LOGGER.error("Error in sanitizeRecursiveNameTree", e);
return;
}
if (_names != null) {
_names.values().forEach(callback);
}
if (efTree.getKids() == null)
return;
for (PDNameTreeNode<T> node : efTree.getKids()) {
sanitizeRecursiveNameTree(node, callback);
}
}
private void sanitizeEmbeddedFile(BleachSession session, PDDocument doc, PDComplexFileSpecification fileSpec) {
LOGGER.trace("Embedded file found: {}", fileSpec.getFilename());
Function<PDEmbeddedFile, PDEmbeddedFile> sanitize = file -> sanitizeEmbeddedFile(session, doc, file);
fileSpec.setEmbeddedFile(sanitize.apply(fileSpec.getEmbeddedFile()));
fileSpec.setEmbeddedFileDos(sanitize.apply(fileSpec.getEmbeddedFileDos()));
fileSpec.setEmbeddedFileMac(sanitize.apply(fileSpec.getEmbeddedFileMac()));
fileSpec.setEmbeddedFileUnicode(sanitize.apply(fileSpec.getEmbeddedFileUnicode()));
fileSpec.setEmbeddedFileUnix(sanitize.apply(fileSpec.getEmbeddedFileUnix()));
}
private PDEmbeddedFile sanitizeEmbeddedFile(BleachSession session, PDDocument doc, PDEmbeddedFile file) {
if (file == null)
return null;
LOGGER.debug("Sanitizing file: Size: {}, Mime-Type: {}, ", file.getSize(), file.getSubtype());
ByteArrayInputStream is;
try {
is = new ByteArrayInputStream(file.toByteArray());
} catch (IOException e) {
LOGGER.error("Error during original's file read", e);
return null;
}
ByteArrayOutputStream os = new ByteArrayOutputStream();
try {
session.sanitize(is, os);
} catch (BleachException e) {
LOGGER.error("Error during the bleach process", e);
return null;
}
ByteArrayInputStream fakeFile = new ByteArrayInputStream(os.toByteArray());
PDEmbeddedFile ef;
try {
ef = new PDEmbeddedFile(doc, fakeFile);
ef.setCreationDate(file.getCreationDate());
ef.setModDate(file.getModDate());
} catch (IOException e) {
LOGGER.error("Error when creating the new sane file", e);
return null;
}
// We copy the properties of the real embedded file
ef.setSubtype(file.getSubtype());
ef.setSize(os.size());
ef.setMacCreator(file.getMacCreator());
ef.setMacResFork(file.getMacResFork());
ef.setMacSubtype(file.getMacSubtype());
// We remove the real file
file.setSize(0);
file.setFile(null);
try {
// And we empty it
file.createOutputStream().close();
} catch (IOException e) {
LOGGER.error("Error when trying to empty the original embedded file", e);
// Not severe, don't abort operations.
}
return ef;
}
private void rewind(RandomAccessRead source) throws IOException {
source.rewind((int) source.getPosition());
}
private PDDocument getDocument(RandomAccessRead source) throws IOException, BleachException {
PDDocument doc;
for (String pwd : COMMON_PASSWORDS) {
ScratchFile scratchFile = new ScratchFile(MEMORY_USAGE_SETTING);
doc = testPassword(scratchFile, source, pwd);
if (doc != null) {
LOGGER.debug("Password was guessed: '{}'", pwd);
doc.protect(new StandardProtectionPolicy(pwd, pwd, doc.getCurrentAccessPermission()));
return doc;
}
scratchFile.close();
}
// @TODO: fetch password from config?
throw new BleachException("PDF is protected with an unknown password");
}
@SuppressFBWarnings(value = "EXS_EXCEPTION_SOFTENING_RETURN_FALSE", justification = "This method is an helper to check the password")
private PDDocument testPassword(ScratchFile inFile, RandomAccessRead source, String password) throws IOException {
PDFParser parser = new PDFParser(source, password, inFile);
try {
parser.parse();
return parser.getPDDocument();
} catch (InvalidPasswordException e) {
LOGGER.error("The tested password is invalid");
return null;
} finally {
rewind(source);
}
}
public void sanitizeObjects(BleachSession session, List<COSObject> objects) {
LOGGER.trace("Checking all objects..."); // Most destructive operation
for (COSObject obj : objects) {
crawl(session, obj.getObject());
}
}
public void sanitizeAcroFormActions(BleachSession session, PDAcroForm acroForm) {
LOGGER.trace("Checking AcroForm Actions");
if (acroForm == null) {
LOGGER.debug("No AcroForms found");
return;
}
Iterator<PDField> fields = acroForm.getFieldIterator();
fields.forEachRemaining(field -> {
// Sanitize annotations
field.getWidgets().forEach(annotation -> sanitizeAnnotation(session, annotation));
// Sanitize field actions
PDFormFieldAdditionalActions fieldActions = field.getActions();
if (fieldActions == null) {
return;
}
sanitizeFieldAdditionalActions(session, fieldActions);
});
}
public void sanitizePageActions(BleachSession session, PDPageTree pages) throws IOException {
LOGGER.trace("Checking Pages Actions");
for (PDPage page : pages) {
sanitizePage(session, page);
}
}
public void sanitizePageActions(BleachSession session, PDPageAdditionalActions pageActions) {
if (pageActions.getC() != null) {
LOGGER.debug("Found&removed action when page is closed, was ({})", pageActions.getC());
pageActions.setC(null);
recordJavascriptThreat(session, "Page Actions", "Action when page is closed");
}
if (pageActions.getO() != null) {
LOGGER.debug("Found&removed action when page is opened, was ({})", pageActions.getO());
pageActions.setO(null);
recordJavascriptThreat(session, "Page Actions", "Action when page is opened");
}
}
public void sanitizeOpenAction(BleachSession session, PDDocumentCatalog docCatalog) throws IOException {
LOGGER.trace("Checking OpenAction...");
PDDestinationOrAction openAction = docCatalog.getOpenAction();
if (openAction == null) {
return;
}
LOGGER.debug("Found a JavaScript OpenAction, removed. Was {}", openAction);
docCatalog.setOpenAction(null);
recordJavascriptThreat(session, "Document Catalog", "OpenAction");
}
public void crawl(BleachSession session, COSBase base) {
if (base == null) {
return;
}
if (base instanceof COSName ||
base instanceof COSString ||
base instanceof COSStream ||
base instanceof COSNull ||
base instanceof COSObject ||
base instanceof COSNumber ||
base instanceof COSBoolean) {
return;
}
if (base instanceof COSDictionary) {
COSDictionary dict = (COSDictionary) base;
Iterator<Map.Entry<COSName, COSBase>> it = dict.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<COSName, COSBase> entry = it.next();
if ("JS".equals(entry.getKey().getName()) || "JavaScript".equals(entry.getKey().getName())) {
it.remove();
LOGGER.debug("Found and removed Javascript code");
recordJavascriptThreat(session, "?", "JS Code");
continue;
}
if ("S".equals(entry.getKey().getName())) {
if (entry.getValue() instanceof COSName) {
if ("JavaScript".equals(((COSName) entry.getValue()).getName())) {
LOGGER.debug("Found and removed Javascript code");
it.remove();
recordJavascriptThreat(session, "?", "JS Code");
continue;
}
}
}
if ("AA".equals(entry.getKey().getName())) {
LOGGER.debug("Found and removed Additionnal Actions");
it.remove();
recordJavascriptThreat(session, "?", "Additional Actions");
continue;
}
crawl(session, entry.getValue());
}
} else if (base instanceof COSArray) {
COSArray ar = (COSArray) base;
for (COSBase item : ar) {
crawl(session, item);
}
} else {
LOGGER.error("Unknown COS type: {}", base);
}
}
public void sanitizeDocumentActions(BleachSession session, PDDocumentCatalogAdditionalActions documentActions) {
LOGGER.trace("Checking additional actions...");
if (documentActions.getDP() != null) {
LOGGER.debug("Found&removed action after printing (was {})", documentActions.getDP());
documentActions.setDP(null);
recordJavascriptThreat(session, "DocumentCatalogAdditionalActions", "Action after printing");
}
if (documentActions.getDS() != null) {
LOGGER.debug("Found&removed action after saving (was {})", documentActions.getDS());
documentActions.setDS(null);
recordJavascriptThreat(session, "DocumentCatalogAdditionalActions", "Action after saving");
}
if (documentActions.getWC() != null) {
LOGGER.debug("Found&removed action before closing (was {}", documentActions.getWC());
documentActions.setWC(null);
recordJavascriptThreat(session, "DocumentCatalogAdditionalActions", "Action before closing");
}
if (documentActions.getWP() != null) {
LOGGER.debug("Found&removed action before printing (was {})", documentActions.getWP());
documentActions.setWP(null);
recordJavascriptThreat(session, "DocumentCatalogAdditionalActions", "Action before printing");
}
if (documentActions.getWS() != null) {
LOGGER.debug("Found&removed action before saving (was {})", documentActions.getWS());
documentActions.setWS(null);
recordJavascriptThreat(session, "DocumentCatalogAdditionalActions", "Action before saving");
}
}
public void sanitizeFieldAdditionalActions(BleachSession session, PDFormFieldAdditionalActions fieldActions) {
if (fieldActions.getC() != null) {
LOGGER.debug("Found&removed an action to be performed in order to recalculate the value of this field when that of another field changes.");
fieldActions.setC(null);
recordJavascriptThreat(session, "FormAdditionalActions", "Action on value change");
}
if (fieldActions.getF() != null) {
LOGGER.debug("Found&removed an action to be performed before the field is formatted to display its current value.");
fieldActions.setF(null);
recordJavascriptThreat(session, "FormAdditionalActions", "Action to format the value");
}
if (fieldActions.getK() != null) {
LOGGER.debug("Found&removed an action to be performed when the user types a keystroke into a text field or combo box or modifies the selection in a scrollable list box.");
fieldActions.setK(null);
recordJavascriptThreat(session, "FormAdditionalActions", "Action when the user types a keystoke");
}
if (fieldActions.getV() != null) {
LOGGER.debug("Found&removed an action to be action to be performed when the field's value is changed.");
fieldActions.setV(null);
recordJavascriptThreat(session, "FormAdditionalActions", "Action when the field's value is changed");
}
}
public void sanitizePage(BleachSession session, PDPage page) throws IOException {
for (PDAnnotation annotation : page.getAnnotations()) {
sanitizeAnnotation(session, annotation);
sanitizePageActions(session, page.getActions());
}
}
public void sanitizeLinkAnnotation(BleachSession session, PDAnnotationLink annotationLink) {
if (annotationLink.getAction() == null) {
return;
}
LOGGER.debug("Found&removed annotation link - action, was {}", annotationLink.getAction());
recordJavascriptThreat(session, "Annotation", "External link");
annotationLink.setAction(null);
}
public void sanitizeWidgetAnnotation(BleachSession session, PDAnnotationWidget annotationWidget) {
if (annotationWidget.getAction() != null) {
LOGGER.debug("Found&Removed action on annotation widget, was {}", annotationWidget.getAction());
recordJavascriptThreat(session, "Annotation", "External widget");
annotationWidget.setAction(null);
}
sanitizeAnnotationActions(session, annotationWidget.getActions());
}
public void sanitizeAnnotationActions(BleachSession session, PDAnnotationAdditionalActions annotationAdditionalActions) {
if (annotationAdditionalActions == null) {
return;
}
if (annotationAdditionalActions.getBl() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the annotation loses the input focus, was {}", annotationAdditionalActions.getBl());
recordJavascriptThreat(session, "Annotation", "Action when annotation loses the input focus");
annotationAdditionalActions.setBl(null);
}
if (annotationAdditionalActions.getD() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the mouse button is pressed inside the annotation's active area, was {}", annotationAdditionalActions.getD());
annotationAdditionalActions.setD(null);
recordJavascriptThreat(session, "Annotation", "Action when mouse button is pressed inside the annotation's active area");
}
if (annotationAdditionalActions.getE() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the cursor enters the annotation's active area, was {}", annotationAdditionalActions.getE());
annotationAdditionalActions.setE(null);
recordJavascriptThreat(session, "Annotation", "Action when the cursor enters the annotation's active area");
}
if (annotationAdditionalActions.getFo() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the annotation receives the input focus, was {}", annotationAdditionalActions.getFo());
annotationAdditionalActions.setFo(null);
recordJavascriptThreat(session, "Annotation", "Action when the annotation receives the input focus");
}
if (annotationAdditionalActions.getPC() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the page containing the annotation is closed, was {}", annotationAdditionalActions.getPC());
annotationAdditionalActions.setPC(null);
recordJavascriptThreat(session, "Annotation", "Action when the page containing the annotation is closed");
}
if (annotationAdditionalActions.getPI() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the page containing the annotation is no longer visible in the viewer application's user interface, was {}", annotationAdditionalActions.getPI());
annotationAdditionalActions.setPI(null);
recordJavascriptThreat(session, "Annotation", "Action when the page containing the annotation is no longer visible");
}
if (annotationAdditionalActions.getPO() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the page containing the annotation is opened, was {}", annotationAdditionalActions.getPO());
annotationAdditionalActions.setPO(null);
recordJavascriptThreat(session, "Annotation", "Action when the page containing the annotation is opened");
}
if (annotationAdditionalActions.getPV() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the page containing the annotation becomes visible in the viewer application's user interface, was {}", annotationAdditionalActions.getPV());
annotationAdditionalActions.setPV(null);
recordJavascriptThreat(session, "Annotation", "Action the page containing the annotation becomes visible");
}
if (annotationAdditionalActions.getU() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the mouse button is released inside the annotation's active area, was {}", annotationAdditionalActions.getU());
annotationAdditionalActions.setU(null);
recordJavascriptThreat(session, "Annotation", "Action when the mouse button is released inside the annotation's active area");
}
if (annotationAdditionalActions.getX() != null) {
LOGGER.debug("Found&Removed action on annotation widget to be performed when the cursor exits the annotation's active area, was {}", annotationAdditionalActions.getX());
annotationAdditionalActions.setX(null);
recordJavascriptThreat(session, "Annotation", "Action when the cursor exits the annotation's active area");
}
}
public void sanitizeAnnotation(BleachSession session, PDAnnotation annotation) {
if (annotation instanceof PDAnnotationLink) {
sanitizeLinkAnnotation(session, (PDAnnotationLink) annotation);
}
if (annotation instanceof PDAnnotationWidget) {
sanitizeWidgetAnnotation(session, (PDAnnotationWidget) annotation);
}
}
private void recordJavascriptThreat(BleachSession session, String location, String details) {
Threat threat = threat()
.type(ThreatType.ACTIVE_CONTENT)
.severity(ThreatSeverity.HIGH)
.details(details)
.location(location)
.action(ThreatAction.REMOVE)
.build();
session.recordThreat(threat);
}
}